Scenario

Within this chapter developmental plasticity was explored within Acanthochromis polyacanthus that were collected from two different regions (i.e., low-latitude, Cairns, and high-latitude, Mackay). Fish were held in common garden experiments at 28.5 C. Clutch data was collected along with parental morphometeric data to determine how fish from each population performed at 28.5 C, a temperature that was shown to produce similar absoluate aerobic scope performances in a previous study [link]. Once hatched offspring were placed into three different temperature treatments, 28.5, 30, and 31.5 C. After approximately 5-6 months offspring length and weight was measured, as well as CTmax and respiration during CTmax trials.

Load packages

library(tidyverse) # data manipulation
library(ggpubr) # figure arrangement 
library(brms) # Bayesian models
library(StanHeaders)# needed to run Bayesian models
library(rstan) # needed to run Bayesian models
library(standist) # needs to be installed 
library(bayesplot) # needed for MCMC diagnostics 
library(DHARMa) # model validation 
library(ggdist) # partial plots 
library(tidybayes) # partial plots 
library(broom.mixed) # model investigation
library(emmeans) # pairwise comparisons
library(rstanarm) # pairwise comparisons - need for emmeans

Set working directory

knitr::opts_knit$set(root.dir=working.dir)

Import data

growth <- read_delim("import_files/growth_data.txt", 
    delim = "\t", escape_double = FALSE, 
    col_types = cols(NOTES = col_skip(), 
        ...16 = col_skip(), ...17 = col_skip()), 
    trim_ws = TRUE) 

clutch_data <- read_delim("import_files/clutch_data_2022_2023.txt", 
    delim = "\t", escape_double = FALSE, 
    trim_ws = TRUE) %>% 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER))

Data manipulation

density <- count(growth, CLUTCH_NUMBER, TANK) |> 
  rename(DENSITY = n) |> 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER), 
         TANK = as.factor(TANK)) 
growth2 <- growth |> 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER), 
         MALE = as.factor(MALE), 
         FEMALE = as.factor(FEMALE), 
         REGION = as.factor(REGION), 
         POPULATION = as.factor(POPULATION), 
         DATE_OF_HATCH = as.Date(DATE_OF_HATCH, format = "%d/%m/%Y"), 
         DATE_SAMPLED = as.Date(DATE_SAMPLED, format = "%d/%m/%Y"), 
         DEV_TEMP = as.factor(DEV_TEMP), 
         TANK = as.factor(TANK), 
         REP = as.factor(REP), 
         LENGTH = as.numeric(LENGTH), 
         MASS = as.numeric(MASS), 
         FULTONK = (1000*MASS)/(LENGTH^3), # Adding FULTON'S K metric
         EXPERIMENT = as.factor(EXPERIMENT)) |> 
  full_join(select(clutch_data, c("CLUTCH_NUMBER",
                                   "MALE_STANDARD_LENGTH", 
                                   "MALE_MASS", 
                                   "MALE_LAT", 
                                   "MALE_LONG", 
                                   "FEMALE_STANDARD_LENGTH", 
                                   "FEMALE_MASS", 
                                   "FEMALE_LAT", 
                                   "FEMALE_LONG", 
                                   "CLUTCH_ORDER", 
                                   "DAYS_IN_TREATMENT", 
                                   "EGG_COUNT", 
                                   "HATCHING_SUCCESS")), by = "CLUTCH_NUMBER") |> 
  select(c(1:6,16:27,7:13,15,14)) |> 
  drop_na(EXPERIMENT) |>
  mutate(CLUTCH_ORDER = as.factor(CLUTCH_ORDER), 
         EXP_GROUP = as.factor(paste0(REGION,"_",DEV_TEMP))) |> 
  inner_join(density, by=c("CLUTCH_NUMBER","TANK")) |> 
  mutate(cLENGTH = scale(LENGTH, scale = FALSE)[,1], 
         cMASS = scale(MASS, scale=FALSE)[,1], 
         cFULTONK = scale(FULTONK, scale=FALSE)[,1], 
         cMALE_STANDARD_LENGTH = scale(MALE_STANDARD_LENGTH, scale=FALSE)[,1], 
         cMALE_MASS = scale(MALE_MASS, scale=FALSE)[,1], 
         cFEMALE_STANDARD_LENGTH = scale(MALE_STANDARD_LENGTH, scale=FALSE)[,1], 
         cFEMALE_MASS = scale(MALE_MASS, scale=FALSE)[,1], 
         cDENSITY = scale(DENSITY, scale=FALSE)[,1], 
         cAGE_DAYS = scale(AGE_DAYS, scale=FALSE)[,1],
         TANK = as.numeric(as.character(TANK)),
         LEVEL = as.factor(case_when(TANK >= 1 & TANK <= 199 ~ 1,
                           TANK >= 200 & TANK <= 299 ~ 2,
                           TANK >= 300 & TANK <= 399 ~ 3,
                           TRUE ~ NA_real_))) |> 
  mutate(TANK = as.factor(TANK)) |> 
  filter(MASS < 4, na.rm=TRUE, 
         FULTONK < 0.079, 
         FULTONK > 0.01, 
         DENSITY > 3)  

In the code above a number of variables were centered because within these metrics the value 0 is meaningless. For example you cannot have a fish length or mass of 0. Therefore, the mean was subtracted for a given variable was subtracted by every value. The y-intercept for these values will therefore reflect the mean, and the slope can be interpreted as ‘y increases/decreases x amount for every 1-unit increase from the mean [INSERT METRIC]’.

Exploratory data analysis

LENGTH VS. MASS

GROUP COMPARISONS

NOTE: On some of the figures ylimits have been set and therefore outliers are hidden

distribution

Parent mass and length

male.mass.plot <- ggplot(growth2, aes(x=MALE_MASS, y=MASS)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Farther vs. offspring mass") +
  theme_classic() 

female.mass.plot <- ggplot(growth2, aes(x=FEMALE_MASS, y=MASS)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Mother vs. offspring mass") +
  theme_classic()  

male.length.plot <- ggplot(growth2, aes(x=MALE_STANDARD_LENGTH, y=LENGTH)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Farther vs. offspring length") +
  theme_classic() 

female.length.plot <- ggplot(growth2, aes(x=FEMALE_STANDARD_LENGTH, y=LENGTH)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Mother vs. offspring length") +
  theme_classic()  

ggarrange(male.mass.plot, female.mass.plot, male.length.plot, female.length.plot, 
          ncol=2, 
          nrow=2)

# {-}

Fit model

Great lets start to fit our models. There are number of variables within our data frame some of which may or may not be important. To investigate which variables are important and which are not will test different hypotheses that use different combinations of variables that are suited to answer reasonable hypotheses.

We will start modelling the standard length of juveniles.

First we will test 3 different hypotheses as described below in more detail. All hypotheses-testing models will have LENGTH as the response variable as well as DEV_TEMP and DENSITY as independent variables. Note that within these models there are no random variables, we will examine random variables in later steps.

  1. The first model will test that hypotheses that juvenile length at different developmental temperatures will also be heavily influenced by the length of the parents. Therefore, this model will include both MALE_STANDARD_LENGTH and FEMALE_STANDARD_LENGTH. However, it is important to note that the length of parents will like be related to each other (i.e., collinearity). We will confirm this assumption with some model validation descriptive statistics. This hypothesis can be seen as out ‘basic’ model.
cLENGTH ~ DEV_TEMP + cFEMALE_STANDARD_LENGTH + cDENSITY

NOTE: Although not displayed this model will only include FEMALE_STANDARD_LENGTH, when both MALE_ and FEMALE_ standard length are included the model is very unhappy due to the collinearity between the two variables.

  1. The second model is based around the clutch/maternal conditions. Additional variables will include FEMALE_STANDARD_LENGTH, DAYS_IN_TEMPERATURE -applies to parents, EGG_COUNT, and HATCHING_SUCCESS. The idea of this hypothesis is that offspring length may be determined by early life conditions such as the size of the mother, how long the mother was exposed to potentially stressful conditions, how wide or narrow reproductive energy was spread, as well as the potential ‘health’ (as indicated by hatching success) of the clutch.
cLENGTH ~ DEV_TEMP + cFEMALE_STANDARD_LENGTH + cDENSITY + DAYS_IN_TREATMENT + EGG_COUNT + HATCHING_SUCCESS
  1. The last model is a geographic model and will include FEMALE_STANDARD_LENGTH, FEMALE_LATITUDE, REGION and DENSITY. In all but a few cases the farther and mother are from the same reef, therefore FEMALE_LATITUDE should be the same as MALE_LATITUDE. In the event that maternal and paternal sources were from different reefs, both parents would have come from similar latitudes. POPULATION was not included as it caused convergence issues, potentially due to the limited sample size from different populations.
cLENGTH ~ DEV_TEMP*REGION + cFEMALE_STANDARD_LENGTH + cDENSITY + FEMALE_LAT + REGION

Once these models are run will be investigate their summary statistics to determine which factors were/were not deemed to have an impact on variation explained. Further models containing combinations of variables may also be explored depending on results.

Also because we are doing Bayesian modelling we will be listing priors. Our length data is normally distributed as shown in some of the exploratory data analysis plots. We can confirm this assumption during the model validation stage as well. Out starting priors will be uninformative priors with distribution of gaussian(0,1), this may or may not change as we progress.

Basic model

model1 <- brm(cMASS ~ DEV_TEMP*REGION + cFEMALE_MASS + cDENSITY + cAGE_DAYS + cLENGTH,
              family=gaussian(),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5) 
## Compiling Stan program...
## Start sampling
prior_summary(model1) 

In this summary statistic table it tells us:

  • for the intercept, it is using a student t(flatter normal) prior with a mean of -0.1 and a standard deviation of 2.5. These values are derived from the median and median absolute deviation of the response variable
standist::visualize("student_t(3,-0.1,2.5)", xlim=c(-5,5))

standist::visualize("student_t(3,-0.1,2.5)", xlim=c(-5,5))

  • for the beta coefficients, including cDENSITY, cFEMALE_STANDARD_LENGTH, DEV_TEMP, the default priors are inproper flat priors.

  • The prior on sigma is also a student t with a mean of 0 and a standard deviation of 0.407

MCMC sampling diagnostics

bayesplot

mcmc_plot(model1, type='combo')

mcmc_plot(model1, type='trace') 
## No divergences to plot.

mcmc_plot(model1, type='dens_overlay') 

mcmc_plot(model1, type='acf_bar')

mcmc_plot(model1, type='rhat_hist')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcmc_plot(model1, type='neff_hist') 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan plots

stan_trace(model1$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model1$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model1$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model1$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model1$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Summary

summary(model1)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cMASS ~ DEV_TEMP * REGION + cFEMALE_MASS + cDENSITY + cAGE_DAYS + cLENGTH 
##    Data: growth2 (Number of observations: 1959) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                     -0.04      0.01    -0.07    -0.02 1.00     4506
## DEV_TEMP30                     0.04      0.02     0.00     0.07 1.00     4341
## DEV_TEMP31.5                   0.08      0.02     0.05     0.12 1.00     4568
## REGIONleading                  0.03      0.02    -0.00     0.06 1.00     4355
## cFEMALE_MASS                  -0.00      0.00    -0.00    -0.00 1.00     4514
## cDENSITY                      -0.02      0.00    -0.02    -0.02 1.00     4779
## cAGE_DAYS                     -0.00      0.00    -0.00     0.00 1.00     4733
## cLENGTH                        0.09      0.00     0.09     0.10 1.00     4960
## DEV_TEMP30:REGIONleading      -0.02      0.02    -0.07     0.02 1.00     4291
## DEV_TEMP31.5:REGIONleading    -0.07      0.02    -0.11    -0.02 1.00     4304
##                            Tail_ESS
## Intercept                      4685
## DEV_TEMP30                     4483
## DEV_TEMP31.5                   4532
## REGIONleading                  4365
## cFEMALE_MASS                   4526
## cDENSITY                       4652
## cAGE_DAYS                      4416
## cLENGTH                        4619
## DEV_TEMP30:REGIONleading       4524
## DEV_TEMP31.5:REGIONleading     4461
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.21      0.00     0.21     0.22 1.00     4534     4593
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Within this model cFEMALE_MASS, and cAGE_DAYS have little influence on the model.

Maternal effects model

model.mat <- brm(cMASS ~ DEV_TEMP*REGION + cFEMALE_MASS + cDENSITY + cAGE_DAYS + DAYS_IN_TREATMENT + EGG_COUNT + HATCHING_SUCCESS + CLUTCH_ORDER + cLENGTH,
              family=gaussian(),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5, 
              refresh = 0) 
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
prior_summary(model.mat) 

In this summary statistic table it tells us:

  • for the intercept, it is using a student t(flatter normal) prior with a mean of -0.1 and a standard deviation of 2.5. These values are derived from the median and median absolute deviation of the response variable.
standist::visualize("student_t(3,-0.1,2.5)", xlim=c(-10,10))

standist::visualize("student_t(3,-0.1,2.5)", xlim=c(-10,10))

  • for the beta coefficients, including cDENSITY, cFEMALE_STANDARD_LENGTH, DEV_TEMP, the default priors are inproper flat priors.

  • The prior on sigma is also a student t with a mean of 0 and a standard deviation of 3.8

MCMC sampling diagnostics

bayesplot

mcmc_plot(model.mat, type='combo')

mcmc_plot(model.mat, type='trace') 
## No divergences to plot.

mcmc_plot(model.mat, type='dens_overlay') 

mcmc_plot(model.mat, type='acf_bar')

mcmc_plot(model.mat, type='rhat_hist')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcmc_plot(model.mat, type='neff_hist') 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan plots

stan_trace(model.mat$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model.mat$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model.mat$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model.mat$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model.mat$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Summary

summary(model.mat)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cMASS ~ DEV_TEMP * REGION + cFEMALE_MASS + cDENSITY + cAGE_DAYS + DAYS_IN_TREATMENT + EGG_COUNT + HATCHING_SUCCESS + CLUTCH_ORDER + cLENGTH 
##    Data: growth2 (Number of observations: 1860) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                     -0.17      0.06    -0.29    -0.05 1.00     4426
## DEV_TEMP30                     0.04      0.02     0.00     0.07 1.00     4615
## DEV_TEMP31.5                   0.08      0.02     0.05     0.12 1.00     4749
## REGIONleading                  0.04      0.02    -0.00     0.09 1.00     4904
## cFEMALE_MASS                  -0.00      0.00    -0.00    -0.00 1.00     4714
## cDENSITY                      -0.02      0.00    -0.02    -0.02 1.00     4434
## cAGE_DAYS                     -0.00      0.00    -0.00     0.00 1.00     4768
## DAYS_IN_TREATMENT             -0.00      0.00    -0.00     0.00 1.00     4777
## EGG_COUNT                      0.00      0.00    -0.00     0.00 1.00     4505
## HATCHING_SUCCESS               0.16      0.03     0.10     0.22 1.00     4759
## CLUTCH_ORDER2                 -0.02      0.02    -0.06     0.02 1.00     4747
## CLUTCH_ORDER3                  0.06      0.03     0.00     0.11 1.00     4807
## cLENGTH                        0.09      0.00     0.09     0.10 1.00     4366
## DEV_TEMP30:REGIONleading      -0.02      0.02    -0.07     0.02 1.00     4908
## DEV_TEMP31.5:REGIONleading    -0.07      0.02    -0.12    -0.02 1.00     4694
##                            Tail_ESS
## Intercept                      4602
## DEV_TEMP30                     4596
## DEV_TEMP31.5                   4597
## REGIONleading                  4901
## cFEMALE_MASS                   4599
## cDENSITY                       4279
## cAGE_DAYS                      4815
## DAYS_IN_TREATMENT              4772
## EGG_COUNT                      4635
## HATCHING_SUCCESS               4609
## CLUTCH_ORDER2                  4728
## CLUTCH_ORDER3                  4773
## cLENGTH                        4291
## DEV_TEMP30:REGIONleading       4691
## DEV_TEMP31.5:REGIONleading     4473
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.21      0.00     0.21     0.22 1.00     4799     4649
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Based on the model.mat output cFEMALE_MASS,cAGE_DAYS, DAYS_IN_TREATMENT, and EGG_COUNT have no credible influence on cMASS. Interestingly, HATCHING SUCCESS seems to have a large influence on cMASS.

Geographic model

model.geo <- brm(cMASS ~ DEV_TEMP*REGION + cFEMALE_MASS + cDENSITY + cAGE_DAYS + FEMALE_LAT + cLENGTH,
              family=gaussian(),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5) 
## Compiling Stan program...
## Start sampling
prior_summary(model.geo) 

In this summary statistic table it tells us:

  • for the intercept, it is using a student t(flatter normal) prior with a mean of 0.1 and a standard deviation of 3.8. These values are derived from the median and median absolute deviation of the response variable
standist::visualize("student_t(3,-0.1,2.5)", xlim=c(-10,10))

standist::visualize("student_t(3,-0.1,2.5)", xlim=c(-10,10))

MCMC sampling diagnostics

bayesplot

mcmc_plot(model.geo, type='combo')

mcmc_plot(model.geo, type='trace') 
## No divergences to plot.

mcmc_plot(model.geo, type='dens_overlay') 

mcmc_plot(model.geo, type='acf_bar')

mcmc_plot(model.geo, type='rhat_hist')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mcmc_plot(model.geo, type='neff_hist') 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan plots

stan_trace(model.geo$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model.geo$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model.geo$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model.geo$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model.geo$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Summary

summary(model.geo)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cMASS ~ DEV_TEMP * REGION + cFEMALE_MASS + cDENSITY + cAGE_DAYS + FEMALE_LAT + cLENGTH 
##    Data: growth2 (Number of observations: 1959) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                      1.46      0.29     0.90     2.02 1.00     4680
## DEV_TEMP30                     0.04      0.02     0.01     0.07 1.00     4675
## DEV_TEMP31.5                   0.09      0.02     0.06     0.13 1.00     4757
## REGIONleading                  0.39      0.07     0.25     0.54 1.00     4724
## cFEMALE_MASS                  -0.00      0.00    -0.00    -0.00 1.00     5095
## cDENSITY                      -0.02      0.00    -0.02    -0.01 1.00     4668
## cAGE_DAYS                     -0.00      0.00    -0.00    -0.00 1.00     4648
## FEMALE_LAT                     0.09      0.02     0.06     0.12 1.00     4669
## cLENGTH                        0.09      0.00     0.09     0.10 1.00     4686
## DEV_TEMP30:REGIONleading      -0.02      0.02    -0.07     0.02 1.00     4502
## DEV_TEMP31.5:REGIONleading    -0.08      0.02    -0.12    -0.03 1.00     4455
##                            Tail_ESS
## Intercept                      4618
## DEV_TEMP30                     4413
## DEV_TEMP31.5                   4781
## REGIONleading                  4677
## cFEMALE_MASS                   4301
## cDENSITY                       4749
## cAGE_DAYS                      4613
## FEMALE_LAT                     4539
## cLENGTH                        4331
## DEV_TEMP30:REGIONleading       4409
## DEV_TEMP31.5:REGIONleading     4691
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.21      0.00     0.21     0.22 1.00     4666     4648
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

In the geographic model cFEMALE_MASS, and cAGE have no credible influence while.

Model Comparisons

All of our models performed relatively well. To determine which model we will use going forward we will compare the three models we ran against each other. Models will be compared via leave-one-out cross-validation, widely applicable information criterion (waic), and bayes factor.

The model.mat model will not be included in the comparison because the additional covariates within the model did not seem to explain a suitable amount of varirance in the model, ALSO, because there were more ’NA’s in the these added covariates the observation sizes within the models were different meaning the models cannot be compared against each other using standard model comparison tools.

Comparison methods

WAIC

model1.waic <- waic(model1)
## Warning: 
## 3 (0.2%) p_waic estimates greater than 0.4. We recommend trying loo instead.
model.geo.waic <- waic(model.geo) 
## Warning: 
## 3 (0.2%) p_waic estimates greater than 0.4. We recommend trying loo instead.
#model.mat.waic <- waic(model.mat)
loo_compare(model1.waic, model.geo.waic) 
##           elpd_diff se_diff
## model.geo   0.0       0.0  
## model1    -12.7       4.4

LOO

LOO(model1, model.geo) 
## Output of model 'model1':
## 
## Computed from 4800 by 1959 log-likelihood matrix.
## 
##          Estimate    SE
## elpd_loo    242.1  59.2
## p_loo        15.7   1.5
## looic      -484.2 118.4
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'model.geo':
## 
## Computed from 4800 by 1959 log-likelihood matrix.
## 
##          Estimate    SE
## elpd_loo    254.7  60.5
## p_loo        16.4   1.6
## looic      -509.5 120.9
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.1]).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
## 
## Model comparisons:
##           elpd_diff se_diff
## model.geo   0.0       0.0  
## model1    -12.6       4.4

Bayes facotr

bayes_factor(model1, model.geo)
## Iteration: 1
## Iteration: 2
## Iteration: 3
## Iteration: 4
## Iteration: 5
## Iteration: 1
## Iteration: 2
## Iteration: 3
## Iteration: 4
## Iteration: 5
## Estimated Bayes factor in favor of model1 over model.geo: 0.00004

Model comparisons indicate that model1 outperforms model.geo. From examining our models we find that cFEMALE_MASS and cAGE did not have a strong influence on any of the models, and therefore they will not be included in the final model. However, in the maternal model HATCHING_SUCCESS was found to have an influence on cMASS so we will add this factor into our final model.

Additional we will add two random factors into our model including, (1|LEVEL) and (1|FEMALE).

model1.mass <- brm(cMASS ~ DEV_TEMP*REGION + HATCHING_SUCCESS + cDENSITY + cLENGTH + (1|LEVEL) + (1|FEMALE),
              family=gaussian(link="identity"),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5, 
              control = list(adapt_delta=0.9))  
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
## Warning: There were 96 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(model1.mass)
## Warning: There were 96 divergent transitions after warmup. Increasing
## adapt_delta above 0.9 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cMASS ~ DEV_TEMP * REGION + HATCHING_SUCCESS + cDENSITY + cLENGTH + (1 | LEVEL) + (1 | FEMALE) 
##    Data: growth2 (Number of observations: 1860) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Group-Level Effects: 
## ~FEMALE (Number of levels: 15) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.07      0.02     0.04     0.12 1.00     3494     4616
## 
## ~LEVEL (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.03      0.05     0.00     0.17 1.01      655      168
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                     -0.19      0.08    -0.37    -0.05 1.00     3661
## DEV_TEMP30                     0.03      0.02     0.00     0.07 1.00     4407
## DEV_TEMP31.5                   0.09      0.02     0.06     0.13 1.00     4234
## REGIONleading                  0.04      0.04    -0.04     0.12 1.00     1929
## HATCHING_SUCCESS               0.19      0.10     0.02     0.41 1.00     1508
## cDENSITY                      -0.02      0.00    -0.02    -0.01 1.00     4559
## cLENGTH                        0.09      0.00     0.09     0.09 1.00     2658
## DEV_TEMP30:REGIONleading      -0.02      0.02    -0.07     0.03 1.00     4513
## DEV_TEMP31.5:REGIONleading    -0.08      0.03    -0.13    -0.03 1.00     4523
##                            Tail_ESS
## Intercept                      4146
## DEV_TEMP30                     4575
## DEV_TEMP31.5                   4658
## REGIONleading                   570
## HATCHING_SUCCESS                424
## cDENSITY                       4654
## cLENGTH                        4319
## DEV_TEMP30:REGIONleading       4525
## DEV_TEMP31.5:REGIONleading     4418
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.21      0.00     0.20     0.22 1.00     4641     4378
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

There seems to be some small issues. Lets investigate the model to find out more.

#--- distribution check ---#
pp_check(model1.mass, type = 'dens_overlay')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

#--- DHARMa checks ---#
growth3 <- growth2 |> 
  drop_na(HATCHING_SUCCESS)
preds <- posterior_predict(model1.mass, ndraws=250, summary=FALSE)
model1.mass.resids <- createDHARMa(simulatedResponse = t(preds), 
                              observedResponse = na.omit(growth3$cMASS), 
                              fittedPredictedResponse = apply(preds, 2, median), 
                              integerResponse = 'student')
plot(model1.mass.resids) ; testDispersion(model1.mass.resids)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.98425, p-value = 0.728
## alternative hypothesis: two.sided

There are a couple issues with the way the residuals are distributed. Lets try to add a log-link into the model and see if that helps.

model1.mass.log <- brm(cMASS ~ DEV_TEMP*REGION + HATCHING_SUCCESS + cDENSITY + cLENGTH + (1|LEVEL) + (1|FEMALE),
              family=gaussian(link="log"),
              data = growth2, 
              warmup = 4000, 
              iter = 10000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              chains = 4, 
              thin = 5, 
              control = list(adapt_delta=0.9))  
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
## Warning: There were 130 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
summary(model1.mass)
## Warning: There were 96 divergent transitions after warmup. Increasing
## adapt_delta above 0.9 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cMASS ~ DEV_TEMP * REGION + HATCHING_SUCCESS + cDENSITY + cLENGTH + (1 | LEVEL) + (1 | FEMALE) 
##    Data: growth2 (Number of observations: 1860) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Group-Level Effects: 
## ~FEMALE (Number of levels: 15) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.07      0.02     0.04     0.12 1.00     3494     4616
## 
## ~LEVEL (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.03      0.05     0.00     0.17 1.01      655      168
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                     -0.19      0.08    -0.37    -0.05 1.00     3661
## DEV_TEMP30                     0.03      0.02     0.00     0.07 1.00     4407
## DEV_TEMP31.5                   0.09      0.02     0.06     0.13 1.00     4234
## REGIONleading                  0.04      0.04    -0.04     0.12 1.00     1929
## HATCHING_SUCCESS               0.19      0.10     0.02     0.41 1.00     1508
## cDENSITY                      -0.02      0.00    -0.02    -0.01 1.00     4559
## cLENGTH                        0.09      0.00     0.09     0.09 1.00     2658
## DEV_TEMP30:REGIONleading      -0.02      0.02    -0.07     0.03 1.00     4513
## DEV_TEMP31.5:REGIONleading    -0.08      0.03    -0.13    -0.03 1.00     4523
##                            Tail_ESS
## Intercept                      4146
## DEV_TEMP30                     4575
## DEV_TEMP31.5                   4658
## REGIONleading                   570
## HATCHING_SUCCESS                424
## cDENSITY                       4654
## cLENGTH                        4319
## DEV_TEMP30:REGIONleading       4525
## DEV_TEMP31.5:REGIONleading     4418
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.21      0.00     0.20     0.22 1.00     4641     4378
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Lets see if the model has improved

#--- distribution check ---#
pp_check(model1.mass.log, type = 'dens_overlay')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

#--- DHARMa checks ---#
growth3 <- growth2 |> 
  drop_na(HATCHING_SUCCESS)
preds <- posterior_predict(model1.mass.log, ndraws=250, summary=FALSE)
model1.mass.log.resids <- createDHARMa(simulatedResponse = t(preds), 
                              observedResponse = na.omit(growth3$cMASS), 
                              fittedPredictedResponse = apply(preds, 2, median), 
                              integerResponse = 'student')
plot(model1.mass.log.resids) ; testDispersion(model1.mass.log.resids)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.84402, p-value < 2.2e-16
## alternative hypothesis: two.sided

Not really, in fact the log link perhaps makes it worse! Maybe we can try a square-root link, as the square root transformation helps if most values are rather small.

Lets see if the model has improved

Partial effects plots

conditional_effects

model1.mass |> 
  conditional_effects(spaghetti = TRUE, ndraws=200) 

Model investigation

summary

summary(model1.mass)
## Warning: There were 96 divergent transitions after warmup. Increasing
## adapt_delta above 0.9 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: cMASS ~ DEV_TEMP * REGION + HATCHING_SUCCESS + cDENSITY + cLENGTH + (1 | LEVEL) + (1 | FEMALE) 
##    Data: growth2 (Number of observations: 1860) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 5;
##          total post-warmup draws = 4800
## 
## Group-Level Effects: 
## ~FEMALE (Number of levels: 15) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.07      0.02     0.04     0.12 1.00     3494     4616
## 
## ~LEVEL (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.03      0.05     0.00     0.17 1.01      655      168
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                     -0.19      0.08    -0.37    -0.05 1.00     3661
## DEV_TEMP30                     0.03      0.02     0.00     0.07 1.00     4407
## DEV_TEMP31.5                   0.09      0.02     0.06     0.13 1.00     4234
## REGIONleading                  0.04      0.04    -0.04     0.12 1.00     1929
## HATCHING_SUCCESS               0.19      0.10     0.02     0.41 1.00     1508
## cDENSITY                      -0.02      0.00    -0.02    -0.01 1.00     4559
## cLENGTH                        0.09      0.00     0.09     0.09 1.00     2658
## DEV_TEMP30:REGIONleading      -0.02      0.02    -0.07     0.03 1.00     4513
## DEV_TEMP31.5:REGIONleading    -0.08      0.03    -0.13    -0.03 1.00     4523
##                            Tail_ESS
## Intercept                      4146
## DEV_TEMP30                     4575
## DEV_TEMP31.5                   4658
## REGIONleading                   570
## HATCHING_SUCCESS                424
## cDENSITY                       4654
## cLENGTH                        4319
## DEV_TEMP30:REGIONleading       4525
## DEV_TEMP31.5:REGIONleading     4418
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.21      0.00     0.20     0.22 1.00     4641     4378
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

R2

model1.mass |> bayes_R2(summary = FALSE) |> median_hdci()

tidyMCMC

tidyMCMC(model1.mass, estimate.method='median', conf.int=TRUE, conf.method='HPDinterval')

gather draws

#model1.re.wo |> get_variables()
model1.mass |> gather_draws(`b_.*|sigma`, regex =TRUE) |> 
  median_hdci()

bayesplot

model1.mass |> mcmc_plot(type='intervals')

emmeans - pariwise

model1.mass |> emmeans(pairwise ~ REGION*DEV_TEMP, type="response") |> pairs(by="DEV_TEMP") |> summary()

probabilities

model1.mass |> emmeans(~ REGION | DEV_TEMP, at = list(DEV_TEMP = 31.5, length.out = 10)) |> pairs(type = 'response')
## DEV_TEMP = 31.5:
##  contrast       estimate lower.HPD upper.HPD
##  core - leading   0.0384    -0.047     0.122
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

summary figure

var <- get_variables(model1.mass); var
##  [1] "b_Intercept"                  "b_DEV_TEMP30"                
##  [3] "b_DEV_TEMP31.5"               "b_REGIONleading"             
##  [5] "b_HATCHING_SUCCESS"           "b_cDENSITY"                  
##  [7] "b_cLENGTH"                    "b_DEV_TEMP30:REGIONleading"  
##  [9] "b_DEV_TEMP31.5:REGIONleading" "sd_FEMALE__Intercept"        
## [11] "sd_LEVEL__Intercept"          "sigma"                       
## [13] "Intercept"                    "r_FEMALE[CARL226,Intercept]" 
## [15] "r_FEMALE[CARL345,Intercept]"  "r_FEMALE[CARL349,Intercept]" 
## [17] "r_FEMALE[CARL359,Intercept]"  "r_FEMALE[COTT399,Intercept]" 
## [19] "r_FEMALE[CPRE459,Intercept]"  "r_FEMALE[CPRE524,Intercept]" 
## [21] "r_FEMALE[CPRE533,Intercept]"  "r_FEMALE[CRUS360,Intercept]" 
## [23] "r_FEMALE[CSAT264,Intercept]"  "r_FEMALE[LCHA118,Intercept]" 
## [25] "r_FEMALE[LCHA120,Intercept]"  "r_FEMALE[LCKM155,Intercept]" 
## [27] "r_FEMALE[LCKM156,Intercept]"  "r_FEMALE[LSBI027,Intercept]" 
## [29] "r_LEVEL[1,Intercept]"         "r_LEVEL[2,Intercept]"        
## [31] "r_LEVEL[3,Intercept]"         "lprior"                      
## [33] "lp__"                         "z_1[1,1]"                    
## [35] "z_1[1,2]"                     "z_1[1,3]"                    
## [37] "z_1[1,4]"                     "z_1[1,5]"                    
## [39] "z_1[1,6]"                     "z_1[1,7]"                    
## [41] "z_1[1,8]"                     "z_1[1,9]"                    
## [43] "z_1[1,10]"                    "z_1[1,11]"                   
## [45] "z_1[1,12]"                    "z_1[1,13]"                   
## [47] "z_1[1,14]"                    "z_1[1,15]"                   
## [49] "z_2[1,1]"                     "z_2[1,2]"                    
## [51] "z_2[1,3]"                     "accept_stat__"               
## [53] "stepsize__"                   "treedepth__"                 
## [55] "n_leapfrog__"                 "divergent__"                 
## [57] "energy__"
var1 <- get_variables(model1.mass)[c(1:4,8:9)] ; var1 
## [1] "b_Intercept"                  "b_DEV_TEMP30"                
## [3] "b_DEV_TEMP31.5"               "b_REGIONleading"             
## [5] "b_DEV_TEMP30:REGIONleading"   "b_DEV_TEMP31.5:REGIONleading"
int_draws_spread <- model1.mass |> spread_draws(!!!syms(var1)) 

int_draws_spread2 <- int_draws_spread |> 
  gather_draws(b_Intercept, b_REGIONleading) |> 
  left_join(int_draws_spread, by = c(".chain",".iteration",".draw"))
  
  
int_draws <- int_draws_spread2 |> 
  mutate(x28.5 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value`, 
                               `.variable` != 'b_Intercept' ~ 
                                 `.value` + b_Intercept), 
         
         x30 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value` + b_DEV_TEMP30, 
                             `.variable` == 'b_REGIONleading' ~ 
                                 `.value` + b_Intercept + b_DEV_TEMP30 + `b_DEV_TEMP30:REGIONleading`), 
         
         x31.5 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value` + b_DEV_TEMP31.5, 
                             `.variable` == 'b_REGIONleading' ~ 
                                 `.value` + b_Intercept + b_DEV_TEMP31.5 + `b_DEV_TEMP31.5:REGIONleading`))
  
int_draws_plotting <- int_draws |> 
  pivot_longer(cols = starts_with("x"), 
               names_to = "DEV_TEMP", 
               values_to = "MASS") |> 
  transmute(LATITUDE = case_when(`.variable` == "b_Intercept" ~ "Low latitude", 
                                 `.variable` == "b_REGIONleading" ~ "High latitude"), 
            DEV_TEMP = case_when(DEV_TEMP == 'x28.5' ~ 'DEV_TEMP_28.5', 
                                 DEV_TEMP == 'x30' ~ 'DEV_TEMP_30', 
                                 DEV_TEMP == 'x31.5' ~ 'DEV_TEMP_31.5'),
            MASS = MASS, 
            LATITUDE_B = LATITUDE, 
            DEV_TEMP_B = DEV_TEMP,
            chain = `.chain`, 
            iteration = `.iteration`, 
            draw_n = `.draw`) |> 
  unite("EXP_GROUP",LATITUDE_B, DEV_TEMP_B,sep="_") |> 
  mutate(EXP_GROUP = as.factor(EXP_GROUP)) |> 
  mutate(EXP_GROUP = factor(EXP_GROUP, levels=c("High latitude_DEV_TEMP_28.5","High latitude_DEV_TEMP_30", "High latitude_DEV_TEMP_31.5",
                                                "Low latitude_DEV_TEMP_28.5","Low latitude_DEV_TEMP_30", "Low latitude_DEV_TEMP_31.5")))


mass.plot <- int_draws_plotting |> 
  ggplot(aes(x=EXP_GROUP, y=MASS)) + 
  geom_hline(yintercept = 0, linetype="dashed", linewidth=1, color="grey58", alpha=0.8) + 
  stat_halfeye(aes(fill = EXP_GROUP, fill_ramp = after_stat(level)), 
               point_interval = mode_hdci, 
               .width = c(.66, .90, .95)) + 
  scale_fill_ramp_discrete(na.translate=FALSE, 
                           labels =c("0.95","0.90","0.66"), 
                           name = "Credible interval") +
  scale_fill_manual(values = c("lightskyblue" ,"dodgerblue2", "dodgerblue4","coral", "red2","firebrick4"))+ 
  scale_y_continuous(limits=c(-0.5,0.5), breaks = seq(-0.5,0.5, .1))+
  ylab("CENTERED MASS (g)") + xlab("EXPERIMENTAL GROUP") +
  scale_x_discrete(labels = c("Low latitude_DEV_TEMP_28.5" = paste0("Low latitude 28.5","\u00B0","C"), 
                              "Low latitude_DEV_TEMP_30" = paste0("Low latitude 30","\u00B0","C"),
                              "Low latitude_DEV_TEMP_31.5" = paste0("Low latitude 31.5","\u00B0","C"), 
                              "High latitude_DEV_TEMP_28.5" = paste0("High latitude 28.5","\u00B0","C"),
                              "High latitude_DEV_TEMP_30" = paste0("High latitude 30","\u00B0","C"),
                              "High latitude_DEV_TEMP_31.5" = paste0("High latitude 31.5","\u00B0","C"))) +
  annotate("text", x=6.8,y=1.4, label = paste0(round(mean(growth3$MASS), 2)," (mm)"), color="grey58") +
  coord_flip() + 
  theme_classic() + 
  guides(fill = "none") + 
  theme(legend.position = c(.88,.86), 
        axis.title.y = element_text(margin = margin(r =0.3, unit = "in"), size = 12), 
        axis.title.x = element_text(margin = margin(t = 0.3, unit="in"), size =12), 
        legend.key = element_rect(color="black", size=1.25)); mass.plot
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`stat_slabinterval()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).